/* 
Collapse worker outcomes data to county-level panels by category

[] brackets indicate internal census variable name that can't be disclosed. 

*/

* define libraries;
libname hannah '/projects/users/########/Snapshot2022/IntermediateData';
libname hantmp '/projects/users/########/Snapshot2022/IntermediateData/TempData';
libname ans '/projects/users/########/Snapshot2022/AnalysisData';

* get county centroids;
* projections are slightly different so use two sources just in case;
data work.counties;
   set maps.county;
   where state in (01 04 05 06 08 10 11 17 18 19 20 23 24 29 30 31 32 35 38 40 47 51 56);  /* FIPS codes for our states */
   format state z2. county z3.;
   state2 = put(state,z2. -L);
   county2 = put(county,z3. -L);
   ctyfips = cats(state2,county2);
run;
%CENTROID(work.counties,work.countycenters,ctyfips);
data work.countycenters;
set work.countycenters;
lat = y*45/atan(1);
long = -x*45/atan(1);
y1 = lat;
x1 = long;
drop lat long x y;
run;

data test;
  set mapsgfk.us_counties (where=(state in (01 04 05 06 08 10 11 17 18 19 20 23 24 29 30 31 32 35 38 40 47 51 56)));
  x = long;
  y = lat;
run;
%CENTROID(work.test,work.countycenters2,ID);
data work.countycenters2;
set work.countycenters2;
ctyfips = substr(ID,4,5);
run;

data work.countycenters;
  merge work.countycenters work.countycenters2 (keep = x y ctyfips);
  by ctyfips;
run;

/* do I need a step here? */
/*
* collapse ehf_icf_analysis (worker data) to county-year level; 
data ans.ehf_icf_analysis;
set ans.ehf_icf_analysis;
ctyfips = substr([internal geocode],1,5);
yc = [internal latitude]/1000000;
xc = [internal longitude]/1000000;
run;
*/

/* merge in category dummies */
proc sql;
  create table hantmp.biganalysisdata as
  select 
    A.*, B.*
  from ans.ehf_icf_analysis as A
    left join hannah.pikspine as B on A.pik = B.pik
  order by A.pik, A.year;
quit;

/* all */
proc sql;
  CREATE TABLE hannah.county_wrkrpanel_2022 AS
  SELECT [county id], year,
  COUNT(*) as tot_pop,
  SUM(case when unemp_instate<0.75 then 1 else 0 end) as tot_work_pop,
  SUM(case when unemp_instate<0.75 then [annual earnings] else 0 end) as tot_emp_earn,
  SUM(unemp_instate) as total_unemp_wrkrs,
  SUM(case when white=1 then 1 else 0 end) as white_pop,
  SUM(case when white=1 and unemp_instate<0.75 then 1 else 0 end) as white_work_pop,
  SUM(case when white=1 and unemp_instate<0.75 then [annual earnings] else 0 end) as white_emp_earn,
  SUM(case when white=1 then unemp_instate else 0 end) as white_unemp_wrkrs,
  SUM(case when black=1 then 1 else 0 end) as black_pop,
  SUM(case when black=1 and unemp_instate<0.75 then 1 else 0 end) as black_work_pop,
  SUM(case when black=1 and unemp_instate<0.75 then [annual earnings] else 0 end) as black_emp_earn,
  SUM(case when black=1 then unemp_instate else 0 end) as black_unemp_wrkrs,
  SUM(case when ind_nat=1 then 1 else 0 end) as indnat_pop,
  SUM(case when ind_nat=1 and unemp_instate<0.75 then 1 else 0 end) as indnat_work_pop,
  SUM(case when ind_nat=1 and unemp_instate<0.75 then [annual earnings] else 0 end) as indnat_emp_earn,
  SUM(case when ind_nat=1 then unemp_instate else 0 end) as indnat_unemp_wrkrs,
  SUM(case when hispanic=1 then 1 else 0 end) as hisp_pop,
  SUM(case when hispanic=1 and unemp_instate<0.75 then 1 else 0 end) as hisp_work_pop,
  SUM(case when hispanic=1 and unemp_instate<0.75 then [annual earnings] else 0 end) as hisp_emp_earn,
  SUM(case when hispanic=1 then unemp_instate else 0 end) as hisp_unemp_wrkrs,
  SUM(case when female=1 then 1 else 0 end) as female_pop,
  SUM(case when female=1 and unemp_instate<0.75 then 1 else 0 end) as female_work_pop,
  SUM(case when female=1 and unemp_instate<0.75 then [annual earnings] else 0 end) as female_emp_earn,
  SUM(case when female=1 then unemp_instate else 0 end) as female_unemp_wrkrs,
  SUM(case when female=0 then 1 else 0 end) as male_pop,
  SUM(case when female=0 and unemp_instate<0.75 then 1 else 0 end) as male_work_pop,
  SUM(case when female=0 and unemp_instate<0.75 then [annual earnings] else 0 end) as male_emp_earn,
  SUM(case when female=0 then unemp_instate else 0 end) as male_unemp_wrkrs,
  SUM(case when nohigh=1 then 1 else 0 end) as nohigh_pop,
  SUM(case when nohigh=1 and unemp_instate<0.75 then 1 else 0 end) as nohigh_work_pop,
  SUM(case when nohigh=1 and unemp_instate<0.75 then [annual earnings] else 0 end) as nohigh_emp_earn,
  SUM(case when nohigh=1 then unemp_instate else 0 end) as nohigh_unemp_wrkrs,
  SUM(case when highsch=1 then 1 else 0 end) as highsch_pop,
  SUM(case when highsch=1 and unemp_instate<0.75 then 1 else 0 end) as highsch_work_pop,
  SUM(case when highsch=1 and unemp_instate<0.75 then [annual earnings] else 0 end) as highsch_emp_earn,
  SUM(case when highsch=1 then unemp_instate else 0 end) as highsch_unemp_wrkrs,
  SUM(case when somecoll=1 then 1 else 0 end) as somecoll_pop,
  SUM(case when somecoll=1 and unemp_instate<0.75 then 1 else 0 end) as somecoll_work_pop,
  SUM(case when somecoll=1 and unemp_instate<0.75 then [annual earnings] else 0 end) as somecoll_emp_earn,
  SUM(case when somecoll=1 then unemp_instate else 0 end) as somecoll_unemp_wrkrs,
  SUM(case when college=1 then 1 else 0 end) as college_pop,
  SUM(case when college=1 and unemp_instate<0.75 then 1 else 0 end) as college_work_pop,
  SUM(case when college=1 and unemp_instate<0.75 then [annual earnings] else 0 end) as college_emp_earn,
  SUM(case when college=1 then unemp_instate else 0 end) as college_unemp_wrkrs,
  MEAN(x) as meanx,
  MEAN(y) as meany
  FROM hantmp.biganalysisdata
  GROUP BY [county id], year
  ORDER BY [county id], year;
quit;

PROC DATASETS library = hantmp nolist;
  delete biganalysisdata;
RUN; QUIT;

data hannah.county_wrkrpanel_2022;
set hannah.county_wrkrpanel_2022;
  ctyfips = [county id];
run;

data hannah.county_wrkrpanel_2022;
  merge hannah.county_wrkrpanel_2022 work.countycenters;
  by ctyfips;
run;



